**************************************************************************************
* THIS DO-FILE CREATES THE DATA SET USED FOR THE ANALYSIS IN FALCKE & VINK (2020)
* Falcke, S. & Vink, M. (2020). Closing a Backdoor to Dual Citizenship. The German Citizenship Law Reform of 2000.
* do-file written by Swantje Falcke
* November 2020
**************************************************************************************

*-------------------------------------------------------------------------------------
* Identify the relevant sample
*-------------------------------------------------------------------------------------

* Load all pids
use ppathl.dta
keep if migback==2 /* keep only first generation immigrants */

*-------------------------------------------------------------------------------------
* Merge with other relevant SOEP data sets
*-------------------------------------------------------------------------------------
merge 1:m pid syear using pgen.dta
drop if _merge==2
merge 1:m pid syear using pbrutto.dta, gen(_merge2)
drop if _merge2==2
merge 1:1 pid syear using pl.dta, gen(_merge3)
drop if _merge3==2
merge 1:1 pid syear using biol.dta, gen(_merge4)
drop if _merge4==2
merge 1:1 pid syear using pequiv.dta, gen(_merge5)
drop if _merge5==2
merge 1:1 pid syear using bioimmig.dta, gen(_merge6)
drop if _merge6==2
merge 1:1 pid syear using bdp_mig.dta, gen(_merge7)
drop if _merge7==2
merge 1:1 pid syear using bdp_mig.dta, gen(_merge8)
drop if _merge8==2
merge 1:1 pid syear using bdp_mig.dta, gen(_merge9)
drop if _merge9==2
merge 1:1 pid syear using bdp_mig.dta, gen(_merge10)
drop if _merge10==2
merge m:1 hid syear using hbrutto.dta, gen(_merge11)
drop if _merge11==2
/* 
To create the two following data sets that are merged:
use pid syear lb0287-lb0320 using bio.dta
foreach i in lb0287 lb0288 lb0289 lb0290 lb0291 lb0292 lb0293 lb0294 lb0295 lb0296 lb0297 lb0298 lb0300 lb0301 lb0302 lb0303 lb0304 lb0305 lb0306 lb0307 lb0308 lb0309 lb0310 lb0311 lb0312 lb0313 lb0314 lb0315 lb0316 lb0317 lb0318 lb0319 lb0320 {
by pid, sort: egen max`i'=max(`i')
}
bysort pid: gen seq=_n
drop if seq!=1
drop syear
save SurvivalChildMarriage.dta
use persnr kidgeb01 using biobirth.dta
rename persnr pid
save SurvivalChildMSample.dta
*/
merge m:1 pid using SurvivalChildMarriage.dta, gen(_merge12)
drop if _merge12==2
merge m:1 pid using SurvivalChildMSample.dta, gen(_merge13)
drop if _merge13==2


*-------------------------------------------------------------------------------------
* Merge other data sets
*-------------------------------------------------------------------------------------
merge m:1 corigin using CountryCode.dta, gen(_merge12)
merge m:1 ccode3 syear using Reacquisition.dta, gen(_merge13)


*-------------------------------------------------------------------------------------
* Prepare data set
*-------------------------------------------------------------------------------------
* Information on nationality *
gen nationality=pgnation
replace nationality=. if nationality ==-2 | nationality==-1 | nationality==999
gen nat2=pnat_v2
label list pnat_v2
label values nat2 pnat_v2
replace nat2=. if nat2==-2 |nat2==-1 | nat2=777 | nat2=999
gen natAdd=pnat_v1
label list pnat_v1
label values nat2 pnat_v1
replace natAdd=. if natAdd==9 | natAdd==8 | natAdd==-1 | natAdd==-2 | natAdd==7
replace nat2=natAdd if nat2==. 
replace nationality=nat2 if nationality==. &nat2!=7
gen nationality_r=nationality
label values nationality_r pgnation
sort pid syear
replace nationality_r = nationality_r[_n-1] if missing(nationality_r) 

*Outcome variable: Being German in a respective year*
gen citizenshipGerman=1 if nationality_r==1 
replace citizenshipGerman=0 if nationality_r!=1
by pid, sort: gen ynat=citizenshipGerman-citizenshipGerman[_n-1] /*If ynat=-1 it means that the person changed from German to a foreign nationality*/
by pid, sort: egen manynatchanges=min(ynat)


*Identify whether citizenshipGerman changes for an individual (if yes, this is someone that naturalizes)*
bysort pid: gen naturalization=citizenshipGerman[1]!=citizenshipGerman[_N] if citizenshipGerman[1]!=1
replace naturalization=0 if naturalization==.
bysort pid: egen naturalization2=max(naturalization) /* just to be able to identify (1) who naturalises but also  (2) to be able to exclude people that naturalize from German to another citizenship */


bysort pid: gen citizenshipchange=citizenshipGerman[1]!=citizenshipGerman[_N] 
bysort pid: egen citizenshipchange2=max(citizenshipchange)
bysort pid: gen nonnat=citizenshipGerman[1]==citizenshipGerman[_N]

br pid syear nationality_r naturalization citizenshipGerman if naturalization2==0 & citizenshipchange2==1 & nonnat!=1
drop if naturalization2==0 & citizenshipchange2==1 & nonnat!=1 /*drop people who change nationality but where first German and afterwards took another nationality */

* Generate variable on year of observed naturalisation ***
by pid, sort: gen ynat=citizenshipGerman-citizenshipGerman[_n-1] /* If this variable is 1, this means that is the year the naturalized. Note that this variable also contains the value "-1" which means that the person change from German to a foreign nationality */
gen ObsNatYear=syear if ynat==1

* Determine who naturalises before migrating *
gen plj0027_r=plj0027
replace plj0027_r=. if plj0027_r==-8 | plj0027_r==-5 | plj0027_r==-2 | plj0027_r==-1
gen plj0019_r=plj0019
replace plj0019_r=. if plj0019_r==-8 | plj0019_r==-5 | plj0019_r==-2 | plj0019_r==-1
gen Myearnat_r=Myearnat
replace Myearnat_r=. if Myearnat==-5 | Myearnat==-2 | Myearnat==-1
gen YearNat_imputed=plj0027_r
replace YearNat_imputed=plj0019_r if YearNat_imputed==.
replace YearNat_imputed=Myearnat_r if YearNat_imputed==.
by pid, sort: egen YearNat2_imputed=max(YearNat_imputed) /* to have this information in all observation years */
gen YearNat_all=ObsNatYear
replace YearNat_all=YearNat2_imputed if YearNat_all==.
by pid, sort: egen YearNat2_all=max(YearNat_all)
gen TimingNat=YearNat2_all-immiyear
label variable TimingNat "Timing of naturalisation: x years after immigration"
replace TimingNat=. if immiyear==-1

* Proxy naturalisation eligibility *
gen eligibility=0
replace eligibility=. if missing(marriedCitizen)
replace eligibility=1 if ysm>=3 & marriedCitizen==1
replace eligibility=1 if ysm>=8 & immiyear>=1992
replace eligibility=1 if ysm>=9 & immiyear==1991
replace eligibility=1 if ysm>=10 & immiyear==1990
replace eligibility=1 if ysm>=11 & immiyear==1989
replace eligibility=1 if ysm>=12 & immiyear==1988
replace eligibility=1 if ysm>=13 & immiyear==1987
replace eligibility=1 if ysm>=14 & immiyear==1986
replace eligibility=1 if ysm>=15 & immiyear<=1985


* Information on who is an ethnic German *
gen aussiedler=.
replace aussiedler=1 if lb0020_v3==1
gen plj0006_r=plj0006
label value plj0006_r plj0006
replace plj0006_r=. if plj0006_r==-8 | plj0006_r==-2 | plj0006_r==-1
replace aussiedler=1 if plj0006_r==1
by pid, sort: egen aussiedler2=max(aussiedler)
by pid, sort: egen aussiedler_biimgrp=max(biimgrp==2)
replace aussiedler2=1 if aussiedler_biimgrp==1
by pid,sort: egen max_biimgrp=max(biimgrp)

* Information on who is an ethnic German, based on country of origin *
gen originaussiedler=0
replace originaussiedler=1 if corigin==22 /*Poland */
replace originaussiedler=1 if corigin==32 /*Russia */
replace originaussiedler=1 if corigin==74 /*Kasachstan */
replace originaussiedler=1 if corigin==21 /* Romania */
replace originaussiedler=1 if corigin==3 /*Former Yugoslavia */
replace originaussiedler=1 if corigin==119 /* Croatia */
replace originaussiedler=1 if corigin==120 /*Bosnia */
replace originaussiedler=1 if corigin==222 /*Eastern Europe */
replace originaussiedler=1 if corigin==140 /*Kosovo-Albania */
replace originaussiedler=1 if corigin==78 /*Ukraine */
replace originaussiedler=1 if corigin==165 /*Serbia */
replace originaussiedler=1 if corigin==31 /*Czech Republic */
replace originaussiedler=1 if corigin==29 /*Bulgaria */
replace originaussiedler=1 if corigin==26 /*Hungary*/
replace originaussiedler=1 if corigin==121 /*Macedonia */
replace originaussiedler=1 if corigin==77 /*Kirgistan */
replace originaussiedler=1 if corigin==122 /*Slovenia */
replace originaussiedler=1 if corigin==75 /*Albania */
replace originaussiedler=1 if corigin==82 /*Tadschikistan */
replace originaussiedler=1 if corigin==97 /*Usbekistan */
replace originaussiedler=1 if corigin==123 /*Slovakia */
replace originaussiedler=1 if corigin==132 /*White Russia/ Belarus */
replace originaussiedler=1 if corigin==130 /*Aserbaidjan */
replace originaussiedler=1 if corigin==148 /*Armenia */
replace originaussiedler=1 if corigin==146 /* Lithuania */
replace originaussiedler=1 if corigin==73 /*Moldovia */
replace originaussiedler=1 if corigin==101 /*Estonia */
replace originaussiedler=1 if corigin==168 /*Montenegro */
replace originaussiedler=1 if corigin==42 /*Turkmenistan */

* region of origin *
gen rorigin=.
label variable rorigin "Region of origin" 
replace rorigin=1 if ccode3=="DZA" | ccode3=="EGY" | ccode3=="LBY" | ccode3=="MAR" | ccode3=="SDN" | ccode3=="TUN" | ccode3=="ESH"
replace rorigin=2 if ccode3=="IOT" | ccode3=="BDI" | ccode3=="COM" | ccode3=="DJI" | ccode3=="ERI" | ccode3=="ETH" | ccode3=="ATF" | ccode3=="KEN" | ccode3=="MDG" | ccode3=="MWI" | ccode3=="MUS" | ccode3=="MYT" | ccode3=="MOZ" | ccode3=="REU" | ccode3=="RWA" | ccode3=="SYC" | ccode3=="SOM" | ccode3=="SSD" | ccode3=="UGA" | ccode3=="TZA" | ccode3=="ZMB" | ccode3=="ZWE"
replace rorigin=3 if ccode3=="AGO" | ccode3=="CMR" | ccode3=="CAF" | ccode3=="TCD" | ccode3=="COD" | ccode3=="COD" | ccode3=="GNQ" | ccode3=="GAB" | ccode3=="STP"
replace rorigin=4 if ccode3=="BWA" | ccode3=="SWZ" | ccode3=="LSA" | ccode3=="NAM" | ccode3=="ZAF"
replace rorigin=5 if ccode3=="BEN" | ccode3=="CPV" | ccode3=="CIV" | ccode3=="GMB" | ccode3=="GHA" | ccode3=="GIN" | ccode3=="GNB" | ccode3=="LBR" | ccode3=="MLI" | ccode3=="MRT" | ccode3=="NER" | ccode3=="NGA" | ccode3=="SHN" | ccode3=="SEN" | ccode3=="SLE" | ccode3=="TGO"
replace rorigin=6 if ccode3=="AIA" | ccode3=="ATG" | ccode3=="ABW" | ccode3=="BHS" | ccode3=="BRB" | ccode3=="BES" | ccode3=="VGB" | ccode3=="CYM" | ccode3=="CUB" | ccode3=="CUW" | ccode3=="DMA" | ccode3=="DOM" | ccode3=="GRD" | ccode3=="GLP" | ccode3=="HTI" | ccode3=="JAM" | ccode3=="MTQ" | ccode3=="MSR" | ccode3=="PRI" | ccode3=="BLM" | ccode3=="KNA" | ccode3=="LCA" | ccode3=="MAF" | ccode3=="VCT" | ccode3=="SXM" | ccode3=="TTO" | ccode3=="TCA" | ccode3=="VIR"
replace rorigin=7 if ccode3=="BLZ" | ccode3=="CRI" | ccode3=="SLV" | ccode3=="GTM" | ccode3=="HND" | ccode3=="MEX" | ccode3=="NIC" | ccode3=="PAN"
replace rorigin=8 if ccode3=="ARG" | ccode3=="BOL" | ccode3=="BVT" | ccode3=="BRA" | ccode3=="CHL" | ccode3=="COL" | ccode3=="ECU" | ccode3=="FLK" | ccode3=="GUF" | ccode3=="GUY" | ccode3=="PRY" | ccode3=="PER" | ccode3=="SGS" | ccode3=="SUR" | ccode3=="URY" | ccode3=="VEN"
replace rorigin=9 if ccode3=="BMU" | ccode3=="CAN" | ccode3=="GRL" | ccode3=="SPM" | ccode3=="USA" 
replace rorigin=10 if ccode3=="KAZ" | ccode3=="KGZ" | ccode3=="TJK" | ccode3=="TKM" | ccode3=="UZB"
replace rorigin=11 if ccode3=="CHN" | ccode3=="HKG" | ccode3=="MAC" | ccode3=="PRK" | ccode3=="JPN" | ccode3=="MNG" | ccode3=="KOR"
replace rorigin=12 if ccode3=="BRN" | ccode3=="KHM" | ccode3=="IDN" | ccode3=="LAO" | ccode3=="MYS" | ccode3=="MMR" | ccode3=="PHL" | ccode3=="SGP" | ccode3=="THA" | ccode3=="TLS" | ccode3=="VNM"
replace rorigin=13 if ccode3=="AFG" | ccode3=="BGD" | ccode3=="BTN" | ccode3=="IND" | ccode3=="IRN" | ccode3=="MDV" | ccode3=="NPL" | ccode3=="PAK" | ccode3=="LKA"
replace rorigin=14 if ccode3=="ARM" | ccode3=="AZE" | ccode3=="BHR" | ccode3=="CYP" | ccode3=="GEO" | ccode3=="IRQ" | ccode3=="ISR" | ccode3=="JOR" | ccode3=="KWT" | ccode3=="LBN" | ccode3=="OMN" | ccode3=="QAT" | ccode3=="SAU" | ccode3=="PSE" | ccode3=="SYR" | ccode3=="TUR" | ccode3=="ARE" | ccode3=="YEM"
replace rorigin=15 if ccode3=="BLR" | ccode3=="BGR" | ccode3=="CZE" | ccode3=="HUN" | ccode3=="POL" | ccode3=="MDA" | ccode3=="ROU" | ccode3=="RUS" | ccode3=="SVK" | ccode3=="UKR"
replace rorigin=16 if ccode3=="ALA"  | ccode3=="GGY"  | ccode3=="JEY"  | ccode3=="DNK" | ccode3=="EST"  | ccode3=="FRO"  | ccode3=="FIN" | ccode3=="ISL"  | ccode3=="IRL" | ccode3=="IMN"  | ccode3=="LVA"  | ccode3=="LTU"  | ccode3=="NOR" | ccode3=="SJM"  | ccode3=="SWE"  | ccode3=="GBR"
replace rorigin=17 if ccode3=="ALB"  | ccode3=="AND"  | ccode3=="BIH"  | ccode3=="HRV"  | ccode3=="GIB"  | ccode3=="GRC"  | ccode3=="VAT"  | ccode3=="ITA" | ccode3=="MLT"  | ccode3=="MNE"  | ccode3=="MKD"  | ccode3=="PRT"  | ccode3=="SMR"  | ccode3=="SRB"  | ccode3=="SVN" | ccode3=="ESP"
replace rorigin=18 if ccode3=="AUT" | ccode3=="BEL" | ccode3=="FRA" | ccode3=="DEU" | ccode3=="LIE" | ccode3=="LUX" | ccode3=="MCO" | ccode3=="NLD" | ccode3=="CHE"
replace rorigin=19 if ccode3=="FJI" | ccode3=="NCL" | ccode3=="PNG" | ccode3=="SLB" | ccode3=="VUT" | ccode3=="GUM" | ccode3=="KIR" | ccode3=="MHL" | ccode3=="FSM" | ccode3=="NRU" | ccode3=="MNP" | ccode3=="PLW" | ccode3=="UMI" | ccode3=="ASM" | ccode3=="COK" | ccode3=="PYF" | ccode3=="NIU" | ccode3=="PCN" | ccode3=="WSM" | ccode3=="TKL" | ccode3=="TON"  | ccode3=="TUV" | ccode3=="WLF"
label define rorigin 1 "Northern Africa" 2 "Eastern Africa" 3 "Middle Africa" 4 "Southern Africa" 5 "Western Africa" 6 "Carribean" 7 "Central America" 8 "South America" 9 "Northern America" 10 "Central Asia" 11 "Eastern Asia" 12 "South-eastern Asia" 13 "Southern Asia" 14 "Western Asia" 15 "Eastern Europe" 16 "Northern Europe" 17 "Southern Europe" 18 "Western Europe" 19 "Oceania"
label values rorigin rorigin
replace rorigin=17 if corigin==140 /* Kosovo-Albaner=Southern Europe */
replace rorigin=17 if corigin==3 /* Ex-Jugoslawien=Southers Europe */
replace rorigin=19 if corigin==41 | corigin==56 /* Australia and New Zealand */
replace rorigin=5 if corigin==94 /* Burkina Faso=Western Africa*/
replace rorigin=3 if corigin==143 /* Congo=Middle Africa */
replace rorigin=11 if corigin==154 /*Taiwan=Eastern Asia */
gen rorigin_broad=.
replace rorigin_broad=1 if rorigin==1
replace rorigin_broad=2 if rorigin==2 | rorigin==3 | rorigin==4 | rorigin==5
replace rorigin_broad=3 if rorigin==6 | rorigin==7 | rorigin==8
replace rorigin_broad=4 if rorigin==9
replace rorigin_broad=5 if rorigin==10
replace rorigin_broad=6 if rorigin==11
replace rorigin_broad=7 if rorigin==12
replace rorigin_broad=8 if rorigin==13
replace rorigin_broad=9 if rorigin==14
replace rorigin_broad=10 if rorigin==15
replace rorigin_broad=11 if rorigin==16
replace rorigin_broad=12 if rorigin==17
replace rorigin_broad=13 if rorigin==18
replace rorigin_broad=14 if rorigin==19
label define broad 1 "Northern Africa" 2 "Sub-Saharan Africa" 3 "Latin America and the Caribbean" 4 "Northern America" 5 "Central Asia" 6 "Eastern Asia" 7 "South-eastern Asia" 8 "Southern Asia" 9 "Western Asia" 10 "Eastern Europe" 11 "Northern Europe" 12 "Southern Europe" 13 "Western Europe" 14 "Oceania"
label values rorigin_broad broad

gen origin_broad=1
replace origin_broad=2 if corigin==2 /*Turkey */
replace origin_broad=3 if corigin==4 & syear>=1981 /*Greece */
replace origin_broad=3 if corigin==5 & syear>=1958 /*Italy */
replace origin_broad=3 if corigin==6 & syear>=1986 /* Spain */
replace origin_broad=3 if corigin==10 & syear>=1995 /* Austria */
replace origin_broad=3 if corigin==11 & syear>=1958 /* France */
replace origin_broad=3 if corigin==13 & syear>=1974 /*Denmark */
replace origin_broad=3 if corigin==14 & syear>=1974 /* UK */
replace origin_broad=3 if corigin==15 & syear>=1995 /* Sweden */
replace origin_broad=3 if corigin==17 & syear>=1995 /*Finland */
replace origin_broad=3 if corigin==21 & syear>=2007 /*Romania*/
replace origin_broad=3 if corigin==22 & syear>=2004 /* Poland */
replace origin_broad=3 if corigin==26 & syear>=2004 /* Hungary */
replace origin_broad=3 if corigin==28 & syear>=1986 /* Portugal */
replace origin_broad=3 if corigin==29 & syear>=2007 /* Bulgaria */
replace origin_broad=3 if corigin==21 & syear>=2004 /* Czech Republic */
replace origin_broad=3 if corigin==71 & syear>=1974 /*Ireland */
replace origin_broad=3 if corigin==116 & syear>=1958 /* Luxembourg */
replace origin_broad=3 if corigin==117 & syear>=1958 /* Belgium */
replace origin_broad=3 if corigin==118 & syear>=1958 /*Niederlande */
replace origin_broad=3 if corigin==119 & syear>=2013 /*Croatia */
replace origin_broad=3 if corigin==122 & syear>=2004 /*Slovenia */
replace origin_broad=3 if corigin==123 & syear>=2004 /*Slovakia */ 
replace origin_broad=3 if corigin==101 & syear>=2004 /*Estonia */
replace origin_broad=3 if corigin==103 & syear>=2004 /* Latvia */
replace origin_broad=3 if corigin==146 & syear>=2004 /*Lithuania */
label define cbroad 1 "non-EU" 2 "Turkey" 3 "EU"
label values origin_broad cbroad
gen EU=.
replace EU=0 if origin_broad==1 | origin_broad==2
replace EU=1 if origin_broad==3

gen Turkey=0
replace Turkey=1 if origin_broad==2

*-------------------------------------------------------------------------------------
* Operationalize Treatment
*-------------------------------------------------------------------------------------
*Treatment Step 1: Is migrant required to renounce origin citizenship while naturalising in Germany?*
gen renunciation=.
replace renunciation=1 if dualcit_grouped==2
replace renunciation=0 if dualcit_grouped==1 | dualcit_grouped==3
replace renunciation=1 if corigin==3 & renunciation==. /* impute the mode of the countries that former Yugoslavia was split up into */
replace renunciation=1 if corigin==140 & renunciation==. /* impute this for Kosovo-Albania */
foreach i in 31 32 119 120 121 122 165 168 {
replace renunciation=1 if corigin==`i' & renunciation==. /* impute data on Yugoslavia and the Soviet Union for the years before its end*/
}

*Step 2: Can one reacquire foreign citizenship while residing in German *
gen reacquisition=.
replace reacquisition=1 if reacq_binary3==2
replace reacquisition=0 if reacq_binary3==1
replace reacquisition=0 if reacquisition==. & corigin==140 /* Value from Serbia (and Yugoslavia) for Kosovo Albania */
replace reacquisition=0 if reacquisition==. & corigin==3 /* Mode of former Yugoslavia (Bosnia and Herzegovina, Croatia, Macedonia, Montenegro, Serbia, (Serbia and Montenegro) */
replace reacquisition=0 if reacquisition==. & corigin==120 /* value former Yugoslavia for Bosnia and Herzegovina */
replace reacquisition=0 if reacquisition==. & corigin==119 /* value former Yugoslavia for Croatia */
replace reacquisition=0 if reacquisition==. & corigin==121 /* value former Yugoslavia or Macedonia */
replace reacquisition=0 if reacquisition==. & corigin==168 /* value former Yugoslavia for Montenegro */
replace reacquisition=0 if reacquisition==. & corigin==165 /* value former Yugoslavia for Serbia */
replace reacquisition=0 if reacquisition==. & corigin==122 /* value former Yugoslavia for Slovenia */
replace reacquisition=0 if reacquisition==. & corigin==31 /* value for Czechoslovakia before 1993*/
replace reacquisition=1 if reacquisition==. & corigin==32 /* value for Soviet Union before 1992*/

* Step 3: Treatment variable *
gen treatment=1 if reacq_binary3==2
replace treatment=0 if reacq_binary3==1
gen interaction = treatment * (syear >= 2000)
label var interaction "Difference-in-Differences"

* Alternative treatment: coming from Turkey *
gen treatment2=Turkey
gen interaction2 = treatment2 * (syear >= 2000)
label var interaction2 "Difference-in-Differences"

*-------------------------------------------------------------------------------------
* Operationalize DV and IV
*-------------------------------------------------------------------------------------
gen y=citizenshipGerman
gen age2=age^2
gen ysm=syear-immiyear
gen ysm2=ysm^2

gen yeareduc_cat=.
replace yeareduc_cat=1 if yeareduc_max>=7 & yeareduc_max<=9
replace yeareduc_cat=2 if yeareduc_max>=9.5 & yeareduc_max<=12
replace yeareduc_cat=3 if yeareduc_max>=12.5 & yeareduc_max<=15
replace yeareduc_cat=4 if yeareduc_max>=15.5 & yeareduc_max<=18
label define yeareduc 1 "7-9" 2 "10-12" 3 "13-15" 4 "16-19"
label values yeareduc_cat yeareduc
replace yeareduc_cat=999 if missing(yeareduc_cat)

gen married=.
replace married=1 if partner==1 | partner==3
replace married=0 if partner!=.  & partner!=-2 & partner!=5 & partner!=1 & partner!=3

replace parid=. if parid==-2
merge m:1 parid syear using PartnerCitizen.dta, gen(_mergeCitPartner) /* How this data set is created can be found in "FalckeVink2020_PartnerCitizen*/
drop if _mergeCitPartner==2
gen marriedCitizen=.
replace marriedCitizen=0 if married==0
replace marriedCitizen=0 if married==1 & GermanPartner==0
replace marriedCitizen=1 if married==1 & GermanPartner==1

gen d11109_r=d11109
replace d11109_r=. if d11109_r==-1 | d11109_r==-2
drop yeareduc
rename d11109_r yeareduc

gen hhincome=i11103
replace hhincome=. if i11103==-5
replace hhincome=. if i11103==-2
egen hhincome_cat=cut(hhincome), group(4) label
replace hhincome_cat=999 if missing(hhincome_cat)

gen working=.
replace working=0 if pglfs!=. &pglfs!=.-1
replace working=1 if pglfs==11

by pid,sort: egen firstchild_born=max(lb0287_h)
replace firstchild_born=. if firstchild_born==-5 | firstchild_born==-2 | firstchild_born==-1
foreach i of num 38/98{
replace firstchild_born=19`i' if firstchild_born==`i'
}
by pid,sort: egen secondchild_born=max(lb0290_h)
replace secondchild_born=. if secondchild_born==-5 | secondchild_born==-2 | secondchild_born==-1
foreach i of num 1/99 {
replace secondchild_born=19`i' if secondchild_born==`i'
}
by pid,sort: egen thirdchild_born=max(lb0293_h)
replace thirdchild_born=. if thirdchild_born==-5 | thirdchild_born==-2 | thirdchild_born==-1
foreach i of num 1/99 {
replace thirdchild_born=19`i' if thirdchild_born==`i'
}
by pid,sort: egen fourthchild_born=max(lb0296_h)
replace fourthchild_born=. if fourthchild_born==-5 | fourthchild_born==-2 | fourthchild_born==-1
foreach i of num 1/99 {
replace fourthchild_born=19`i' if fourthchild_born==`i'
}
by pid, sort: egen fifthchild_born=max(lb0299_h)
replace fifthchild_born=. if fifthchild_born==-5 | fifthchild_born==-2 | fifthchild_born==-1
foreach i of num 1/99 {
replace fifthchild_born=19`i' if fifthchild_born==`i'
}
by pid,sort: egen sixthchild_born=max(lb0302_h)
replace sixthchild_born=. if sixthchild_born==-5 | sixthchild_born==-2 | sixthchild_born==-1
foreach i of num 1/99 {
replace sixthchild_born=19`i' if sixthchild_born==`i'
}
by pid,sort: egen seventhchild_born=max(lb0305_h)
replace seventhchild_born=. if seventhchild_born==-5 | seventhchild_born==-2 | seventhchild_born==-1
foreach i of num 1/99 {
replace seventhchild_born=19`i' if seventhchild_born==`i'
}
by pid, sort: egen eightschild_born=max(lb0308_h)
replace eightschild_born=. if eightschild_born==-5 | eightschild_born==-2 | eightschild_born==-1
foreach i of num 1/99 {
replace eightschild_born=19`i' if eightschild_born==`i'
}

gen child1_age=syear-firstchild_born if firstchild_born!=.
replace child1_age=. if child1_age<0
gen child2_age=syear-secondchild_born if secondchild_born!=.
replace child2_age=. if child2_age<0
gen child3_age=syear-thirdchild_born if thirdchild_born!=.
replace child3_age=. if child3_age<0
gen child4_age=syear-fourthchild_born if fourthchild_born!=.
replace child4_age=. if child4_age<0
gen child5_age=syear-fifthchild_born if fifthchild_born!=.
replace child5_age=. if child5_age<0
gen child6_age=syear-sixthchild_born if sixthchild_born!=.
replace child6_age=. if child6_age<0
gen child7_age=syear-seventhchild_born if seventhchild_born!=.
replace child7_age=. if child7_age<0
gen child8_age=syear-eightschild_born if  eightschild_born!=.
replace child8_age=. if child8_age<0
gen child18=0
label variable child18 "Child below 18"
replace child18=1 if child1_age<18
replace child18=1 if child2_age<18
replace child18=1 if child3_age<18
replace child18=1 if child4_age<18
replace child18=1 if child5_age<18
replace child18=1 if child6_age<18
replace child18=1 if child7_age<18
replace child18=1 if child8_age<18

gen bula2=bula
replace bula2=. if bula==-3 | bula==-2 | bula==-1
label value bula2 bula

* post2000*
gen post2000=.
replace post2000=0 if syear<2000
replace post2000=1 if syear>=2000

* missing categories variables *
gen working_m=working
replace working_m=999 if working==.
gen bula2_m=bula2
replace bula2_m=999 if bula2==.

save FalckeVink2020.dta

